//do \\c3\rdat\SHoude\Research\EEgap\EEgap_scripts\Select_Dominated_Week_Pair_Match_vPaper_08072018.do 
set more off
pause on
global pathname="raw_data_folder"
  

/*
j=2(1)5
pairs_matched_`j'_size2only_wmiss_kwh
pairs_matched_`j'_size2only_nomiss_kwh
pairs_matched_`j'_size2only_impute_kwh
*/ 

/*
j=2(1)5
pid_pair_j_wmiss_kwh
pid_pair_j_nomiss_kwh
pid_pair_j_impute_kwh
*/

use $pathname\lcidemo_046_2008_2012_allsales, clear
	drop if year==2007
/*	
	sort pid
	merge pid using  $pathname\pairs_matched_4_size2only
	keep if _m==3
	drop _m	

	bys pid datenum zipcode week year: egen promo_zdwy=median(promo_p)
	bys pid datenum week year: egen promo_dwy=median(promo_p)
	bys pid week year: egen promo_wy=median(promo_p)
*/	
  
	sort pid
	merge pid using  $pathname\pairs_matched_2_size2only_manual_check_YQ_SH
	//merge pid using  $pathname\pairs_matched_4_size2restricted
	keep if _m==3
	drop _m		
		
	//We flag price outliers here
	keep if o_qty==1 | o_qty==.
	bys pid: egen promo_avg=mean(promo_p)
	gen ratio_out=promo_p/promo_avg
	gen outlier=cond(ratio_out>3 | ratio_out<0.333,1,0)
	drop if outlier==1  
		
	collapse(sum) o_qty (median) retail_p promo_p,by(pid datenum zipcode)

/*
Create a panel structure where we fill missing daily price for a given pid in a given zipcode.
The identifier for a dominated price event is at the pair X week-year X zipcode level.

Prices are imputed using observed sales. In the absence of sales for a pid in given day, 
the price used first is the price on the same day in other zip codes
the price used second is the weekly price in the same zip code
the price used third is the weekly price 
*/	
	egen pid_zip=group(pid zip)

preserve
	collapse(sum) o_qty,by(pid zip pid_zip)
	ren 	o_qty sales_pid_zip
	sort pid_zip
	save "$pathname\sales_pid_zip_pairs_matched_2_size2only_manual_check_YQ_SH", replace
restore
	
	tsset pid_zip datenum 
	tsfill
	gen week=week(datenum)
	gen year=year(datenum)
	drop pid zip
	sort pid_zip
	//merge pid_zip using $pathname\sales_pid_zip_pairs_matched_4_size2restricted
	merge pid_zip using $pathname\sales_pid_zip_pairs_matched_2_size2only_manual_check_YQ_SH
	tab _m
	drop _m
	
	
//Identify week-zipcode where dominated price events (price of the most efficient model is cheaper) occured 	
	sort pid
	//merge pid using $pathname\pairs_matched_4_size2restricted
	merge pid using $pathname\pairs_matched_2_size2only_manual_check_YQ_SH
	tab _m
	keep if _m==3
	drop _m

	bys pid zipcode week year: egen promo_zwy=median(promo_p)
	bys pid datenum week year: egen promo_dwy=median(promo_p)
	bys pid week year: egen promo_wy=median(promo_p)
	
	gen promo_high_temp=promo_p if pair_high_ee==1 
	replace promo_high_temp=promo_dwy if promo_high_temp==. & pair_high_ee==1
	replace promo_high_temp=promo_zwy if promo_high_temp==. & pair_high_ee==1
	replace promo_high_temp=promo_wy if promo_high_temp==. & pair_high_ee==1
	
	bys pid_pair_2_wmiss_kwh datenum zipcode week year: egen promo_high_dzwy=median(promo_high_temp) 
	bys pid_pair_2_wmiss_kwh datenum week year: egen promo_high_dwy=median(promo_high_temp) 
	bys pid_pair_2_wmiss_kwh zipcode week year: egen promo_high_zwy=median(promo_high_temp) 
	bys pid_pair_2_wmiss_kwh week year: egen promo_high_wy=median(promo_high_temp) 
	gen promo_high=promo_high_dzwy
	replace promo_high=promo_high_dwy if promo_high==.
	replace promo_high=promo_high_zwy if promo_high==.
	replace promo_high=promo_high_wy if promo_high==.
	
	gen promo_low_temp=promo_p if pair_high_ee==0
	replace promo_low_temp=promo_dwy if promo_low_temp==. & pair_high_ee==0
	replace promo_low_temp=promo_zwy if promo_low_temp==. & pair_high_ee==0
	replace promo_low_temp=promo_wy if promo_low_temp==. & pair_high_ee==0
	bys pid_pair_2_wmiss_kwh datenum zipcode week year: egen promo_low_dzwy=median(promo_low_temp)
	bys pid_pair_2_wmiss_kwh datenum week year: egen promo_low_dwy=median(promo_low_temp) 
	bys pid_pair_2_wmiss_kwh zipcode week year: egen promo_low_zwy=median(promo_low_temp) 
	bys pid_pair_2_wmiss_kwh week year: egen promo_low_wy=median(promo_low_temp) 
	gen promo_low=promo_low_dzwy
	replace promo_low=promo_low_dwy if promo_low==.
	replace promo_low=promo_low_zwy if promo_low==.
	replace promo_low=promo_low_wy if promo_low==.
	
	gen pratio_high_low=promo_high/promo_low
	gen delta_price=promo_high-promo_low
	
	gen dominated_week_tmp=0
	replace dominated_week_tmp=1 if pratio_high_low<=1 & pair_high_ee==1 & pratio_high_low!=.
	bys pid_pair_2_wmiss_kwh zipcode datenum week year: egen dominated_week=max(dominated_week_tmp)
	
	gen not_dominated_week_tmp=0
	replace not_dominated_week_tmp=1 if pratio_high_low>1 & pair_high_ee==1 & pratio_high_low!=.
	bys pid_pair_2_wmiss_kwh zipcode datenum week year: egen not_dominated_week=max(not_dominated_week_tmp)  
	                                	
	collapse(mean) not_dominated_week dominated_week pratio_high_low delta_price promo_low promo_high,by(pid_pair_2_wmiss_kwh zipcode datenum week year)
	sort pid_pair_2_wmiss_kwh  zipcode datenum week year	
save $pathname\Dominated_Week_pairs_matched_2_size2only_manual_check_YQ_SH, replace	

tab dominated_week


	
